home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Dr. Windows 3
/
dr win3.zip
/
dr win3
/
NEW_TECH
/
GREPS.ZIP
/
DFA.C
< prev
next >
Wrap
C/C++ Source or Header
|
1993-09-17
|
60KB
|
2,276 lines
/* dfa.c - determinisitic extended regexp routines for GNU
Copyright (C) 1988 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2, or (at your option)
any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
/* Written June, 1988 by Mike Haertel
Modified July, 1988 by Arthur David Olson to assist BMG speedups */
#include <stdio.h>
#include <assert.h>
#if defined(USG) || defined(STDC_HEADERS)
#include <string.h>
#ifndef index
#define index strchr
#endif
#else
#include <string.h>
#endif
#include "dfa.h"
#if __STDC__
typedef void *ptr_t;
#else
typedef char *ptr_t;
#endif
static void regmust();
static ptr_t
xcalloc(n, s)
int n;
size_t s;
{
ptr_t r = calloc(n, s);
if (!r)
regerror("Memory exhausted");
return r;
}
ptr_t /* Not static, so alloca.o can use it. */
xmalloc(n)
size_t n;
{
ptr_t r = malloc(n);
assert(n != 0);
if (!r)
regerror("Memory exhausted");
return r;
}
static ptr_t
xrealloc(p, n)
ptr_t p;
size_t n;
{
ptr_t r = realloc(p, n);
assert(n != 0);
if (!r)
regerror("Memory exhausted");
return r;
}
#define CALLOC(p, t, n) ((p) = (t *) xcalloc((n), sizeof (t)))
#define MALLOC(p, t, n) ((p) = (t *) xmalloc((n) * sizeof (t)))
#define REALLOC(p, t, n) ((p) = (t *) xrealloc((ptr_t) (p), (n) * sizeof (t)))
/* Reallocate an array of type t if nalloc is too small for index. */
#define REALLOC_IF_NECESSARY(p, t, nalloc, index) \
if ((index) >= (nalloc)) \
{ \
while ((index) >= (nalloc)) \
(nalloc) *= 2; \
REALLOC(p, t, nalloc); \
}
#ifdef DEBUG
#include <stdio.h>
static void
prtok(t)
_token t;
{
char *s;
if (t < 0)
fprintf(stderr, "END");
else if (t < _NOTCHAR)
fprintf(stderr, "%c", t);
else
{
switch (t)
{
case _EMPTY: s = "EMPTY"; break;
case _BACKREF: s = "BACKREF"; break;
case _BEGLINE: s = "BEGLINE"; break;
case _ALLBEGLINE: s = "ALLBEGLINE"; break;
case _ENDLINE: s = "ENDLINE"; break;
case _ALLENDLINE: s = "ALLENDLINE"; break;
case _BEGWORD: s = "BEGWORD"; break;
case _ENDWORD: s = "ENDWORD"; break;
case _LIMWORD: s = "LIMWORD"; break;
case _NOTLIMWORD: s = "NOTLIMWORD"; break;
case _QMARK: s = "QMARK"; break;
case _STAR: s = "STAR"; break;
case _PLUS: s = "PLUS"; break;
case _CAT: s = "CAT"; break;
case _OR: s = "OR"; break;
case _LPAREN: s = "LPAREN"; break;
case _RPAREN: s = "RPAREN"; break;
default: s = "SET"; break;
}
fprintf(stderr, "%s", s);
}
}
#endif /* DEBUG */
/* Stuff pertaining to charsets. */
static int
tstbit(b, c)
int b;
_charset c;
{
return c[b / INTBITS] & 1 << b % INTBITS;
}
static void
setbit(b, c)
int b;
_charset c;
{
c[b / INTBITS] |= 1 << b % INTBITS;
}
static void
clrbit(b, c)
int b;
_charset c;
{
c[b / INTBITS] &= ~(1 << b % INTBITS);
}
static void
copyset(src, dst)
const _charset src;
_charset dst;
{
int i;
for (i = 0; i < _CHARSET_INTS; ++i)
dst[i] = src[i];
}
static void
zeroset(s)
_charset s;
{
int i;
for (i = 0; i < _CHARSET_INTS; ++i)
s[i] = 0;
}
static void
notset(s)
_charset s;
{
int i;
for (i = 0; i < _CHARSET_INTS; ++i)
s[i] = ~s[i];
}
static int
equal(s1, s2)
const _charset s1;
const _charset s2;
{
int i;
for (i = 0; i < _CHARSET_INTS; ++i)
if (s1[i] != s2[i])
return 0;
return 1;
}
/* A pointer to the current regexp is kept here during parsing. */
static struct regexp *reg;
/* Find the index of charset s in reg->charsets, or allocate a new charset. */
static int
charset_index(s)
const _charset s;
{
int i;
for (i = 0; i < reg->cindex; ++i)
if (equal(s, reg->charsets[i]))
return i;
REALLOC_IF_NECESSARY(reg->charsets, _charset, reg->calloc, reg->cindex);
++reg->cindex;
copyset(s, reg->charsets[i]);
return i;
}
/* Syntax bits controlling the behavior of the lexical analyzer. */
static syntax_bits, syntax_bits_set;
/* Flag for case-folding letters into sets. */
static case_fold;
/* Entry point to set syntax options. */
void
regsyntax(bits, fold)
int bits;
int fold;
{
syntax_bits_set = 1;
syntax_bits = bits;
case_fold = fold;
}
/* Lexical analyzer. */
static const char *lexstart; /* Pointer to beginning of input string. */
static const char *lexptr; /* Pointer to next input character. */
static lexleft; /* Number of characters remaining. */
static caret_allowed; /* True if backward context allows ^
(meaningful only if RE_CONTEXT_INDEP_OPS
is turned off). */
static closure_allowed; /* True if backward context allows closures
(meaningful only if RE_CONTEXT_INDEP_OPS
is turned off). */
/* Note that characters become unsigned here. */
#define FETCH(c, eoferr) \
{ \
if (! lexleft) \
if (eoferr) \
regerror(eoferr); \
else \
return _END; \
(c) = (unsigned char) *lexptr++; \
--lexleft; \
}
static _token
lex()
{
_token c, c2;
int invert;
_charset cset;
FETCH(c, (char *) 0);
switch (c)
{
case '^':
if (! (syntax_bits & RE_CONTEXT_INDEP_OPS)
&& (!caret_allowed ||
(syntax_bits & RE_TIGHT_VBAR) && lexptr - 1 != lexstart))
goto normal_char;
caret_allowed = 0;
return syntax_bits & RE_TIGHT_VBAR ? _ALLBEGLINE : _BEGLINE;
case '$':
if (syntax_bits & RE_CONTEXT_INDEP_OPS || !lexleft
|| (! (syntax_bits & RE_TIGHT_VBAR)
&& ((syntax_bits & RE_NO_BK_PARENS
? lexleft > 0 && *lexptr == ')'
: lexleft > 1 && *lexptr == '\\' && lexptr[1] == ')')
|| (syntax_bits & RE_NO_BK_VBAR
? lexleft > 0 && *lexptr == '|'
: lexleft > 1 && *lexptr == '\\' && lexptr[1] == '|'))))
return syntax_bits & RE_TIGHT_VBAR ? _ALLENDLINE : _ENDLINE;
goto normal_char;
case '\\':
FETCH(c, "Unfinished \\ quote");
switch (c)
{
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
caret_allowed = 0;
closure_allowed = 1;
return _BACKREF;
case '<':
caret_allowed = 0;
return _BEGWORD;
case '>':
caret_allowed = 0;
return _ENDWORD;
case 'b':
caret_allowed = 0;
return _LIMWORD;
case 'B':
caret_allowed = 0;
return _NOTLIMWORD;
case 'w':
case 'W':
zeroset(cset);
for (c2 = 0; c2 < _NOTCHAR; ++c2)
if (ISALNUM(c2))
setbit(c2, cset);
if (c == 'W')
notset(cset);
caret_allowed = 0;
closure_allowed = 1;
return _SET + charset_index(cset);
case '?':
if (syntax_bits & RE_BK_PLUS_QM)
goto qmark;
goto normal_char;
case '+':
if (syntax_bits & RE_BK_PLUS_QM)
goto plus;
goto normal_char;
case '|':
if (! (syntax_bits & RE_NO_BK_VBAR))
goto or;
goto normal_char;
case '(':
if (! (syntax_bits & RE_NO_BK_PARENS))
goto lparen;
goto normal_char;
case ')':
if (! (syntax_bits & RE_NO_BK_PARENS))
goto rparen;
goto normal_char;
default:
goto normal_char;
}
case '?':
if (syntax_bits & RE_BK_PLUS_QM)
goto normal_char;
qmark:
if (! (syntax_bits & RE_CONTEXT_INDEP_OPS) && !closure_allowed)
goto normal_char;
return _QMARK;